On commence par charger les données, en ne gardant que les communes de plus de 10 000 habitants (i.e. nombre de personnes en RP > 10 000 en 2022) :
data = fread("../base_2012_2022.csv", encoding = "UTF-8")
data_com = data[annee == 2022, .(pop = sum(nb_personnes_en_RP)),
by = c("COM", "annee")]
liste_com = data_com[pop >10000, COM]
df = data[COM %in% liste_com]
On réalise l’APC à l’aide du package FactoMineR. On réalise plusieurs ACP.
df_num <- df[annee==2017, .SD, .SDcols = is.numeric]
df_num = df_num[,-"annee"]
res <- PCA(df_num,
scale.unit = TRUE,
graph = FALSE)
summary(res)
##
## Call:
## PCA(X = df_num, scale.unit = TRUE, graph = FALSE)
##
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
## Variance 13.638 3.641 2.284 1.397 1.031 0.827 0.740
## % of var. 52.454 14.006 8.785 5.374 3.967 3.180 2.848
## Cumulative % of var. 52.454 66.460 75.245 80.619 84.586 87.766 90.614
## Dim.8 Dim.9 Dim.10 Dim.11 Dim.12 Dim.13 Dim.14
## Variance 0.470 0.395 0.366 0.292 0.238 0.199 0.181
## % of var. 1.809 1.521 1.409 1.123 0.914 0.764 0.696
## Cumulative % of var. 92.423 93.943 95.352 96.475 97.389 98.153 98.849
## Dim.15 Dim.16 Dim.17 Dim.18 Dim.19 Dim.20 Dim.21
## Variance 0.121 0.097 0.061 0.015 0.006 0.000 0.000
## % of var. 0.466 0.372 0.233 0.057 0.021 0.001 0.000
## Cumulative % of var. 99.315 99.688 99.921 99.978 99.999 100.000 100.000
## Dim.22 Dim.23 Dim.24 Dim.25 Dim.26
## Variance 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000
##
## Individuals (the 10 first)
## Dist Dim.1 ctr cos2 Dim.2 ctr
## 1 | 2.739 | -2.041 0.002 0.556 | -0.342 0.000
## 2 | 5.449 | 4.272 0.010 0.615 | -0.937 0.002
## 3 | 6.862 | 6.031 0.020 0.773 | 0.560 0.001
## 4 | 8.166 | 6.617 0.024 0.657 | 4.320 0.038
## 5 | 4.888 | 3.172 0.005 0.421 | -1.595 0.005
## 6 | 2.516 | 0.418 0.000 0.028 | 1.265 0.003
## 7 | 2.427 | 0.570 0.000 0.055 | 1.563 0.005
## 8 | 2.745 | 0.538 0.000 0.038 | -0.291 0.000
## 9 | 3.267 | -2.534 0.003 0.602 | 0.955 0.002
## 10 | 6.491 | 3.841 0.008 0.350 | 4.527 0.041
## cos2 Dim.3 ctr cos2
## 1 0.016 | -0.831 0.002 0.092 |
## 2 0.030 | -2.434 0.019 0.200 |
## 3 0.007 | -2.515 0.020 0.134 |
## 4 0.280 | 0.118 0.000 0.000 |
## 5 0.107 | -2.035 0.013 0.173 |
## 6 0.253 | -1.133 0.004 0.203 |
## 7 0.415 | -0.925 0.003 0.145 |
## 8 0.011 | -2.287 0.017 0.694 |
## 9 0.085 | -1.022 0.003 0.098 |
## 10 0.486 | -1.157 0.004 0.032 |
##
## Variables (the 10 first)
## Dim.1 ctr cos2 Dim.2 ctr cos2
## nb_menages | 0.975 6.967 0.950 | -0.100 0.277 0.010 |
## nb_personnes_menage | 0.966 6.838 0.933 | 0.130 0.463 0.017 |
## nb_logements | 0.838 5.145 0.702 | -0.124 0.419 0.015 |
## nb_RP_1_piece | 0.458 1.535 0.209 | -0.611 10.254 0.373 |
## nb_RP_2_pieces | 0.683 3.424 0.467 | -0.552 8.355 0.304 |
## nb_RP_3_pieces | 0.793 4.607 0.628 | -0.287 2.265 0.082 |
## nb_RP_4_pieces | 0.707 3.670 0.500 | 0.374 3.838 0.140 |
## nb_RP_5_piece_et_plus | 0.429 1.348 0.184 | 0.735 14.820 0.540 |
## nb_RP_en_loc | 0.742 4.032 0.550 | -0.617 10.440 0.380 |
## nb_RP_proprio | 0.666 3.256 0.444 | 0.628 10.840 0.395 |
## Dim.3 ctr cos2
## nb_menages 0.076 0.254 0.006 |
## nb_personnes_menage -0.134 0.783 0.018 |
## nb_logements 0.201 1.762 0.040 |
## nb_RP_1_piece 0.415 7.550 0.172 |
## nb_RP_2_pieces 0.265 3.063 0.070 |
## nb_RP_3_pieces -0.177 1.373 0.031 |
## nb_RP_4_pieces -0.362 5.729 0.131 |
## nb_RP_5_piece_et_plus 0.061 0.163 0.004 |
## nb_RP_en_loc -0.149 0.967 0.022 |
## nb_RP_proprio 0.298 3.894 0.089 |
# explor(res)
fviz_pca_biplot(res, invisible = "ind")
fviz_pca_var(
res,
repel = TRUE,
col.var = "contrib"
)
fviz_pca_var(
res,
col.var = "contrib",
select.var = list(contrib = 5),
repel = TRUE
)
fviz_pca_var(
res,
col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE
)
fviz_pca_var(
res,
col.var = "cos2",
select.var = list(cos2 = 0.90),
repel = TRUE,
gradient.cols = c("#E7B800", "#FC4E07"),
)
df_num <- df[, .SD, .SDcols = is.numeric]
res <- PCA(df_num,
scale.unit = TRUE,
graph = FALSE)
## Warning in PCA(df_num, scale.unit = TRUE, graph = FALSE): Missing values are
## imputed by the mean of the variable: you should use the imputePCA function of
## the missMDA package
summary(res)
##
## Call:
## PCA(X = df_num, scale.unit = TRUE, graph = FALSE)
##
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
## Variance 13.660 3.637 2.273 1.397 1.042 0.993 0.817
## % of var. 50.592 13.471 8.418 5.172 3.858 3.679 3.027
## Cumulative % of var. 50.592 64.062 72.480 77.653 81.511 85.190 88.217
## Dim.8 Dim.9 Dim.10 Dim.11 Dim.12 Dim.13 Dim.14
## Variance 0.725 0.475 0.396 0.369 0.286 0.244 0.201
## % of var. 2.685 1.759 1.468 1.365 1.059 0.905 0.746
## Cumulative % of var. 90.903 92.661 94.129 95.494 96.553 97.458 98.203
## Dim.15 Dim.16 Dim.17 Dim.18 Dim.19 Dim.20 Dim.21
## Variance 0.181 0.121 0.098 0.064 0.015 0.005 0.000
## % of var. 0.671 0.448 0.362 0.236 0.057 0.020 0.001
## Cumulative % of var. 98.875 99.322 99.684 99.920 99.977 99.996 99.998
## Dim.22 Dim.23 Dim.24 Dim.25 Dim.26 Dim.27
## Variance 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.001 0.001 0.000 0.000 0.000 0.000
## Cumulative % of var. 99.999 100.000 100.000 100.000 100.000 100.000
##
## Individuals (the 10 first)
## Dist Dim.1 ctr cos2 Dim.2 ctr
## 1 | 3.296 | -2.668 0.000 0.655 | 0.423 0.000
## 2 | 6.202 | 4.306 0.001 0.482 | -0.001 0.000
## 3 | 5.736 | 4.447 0.001 0.601 | 0.228 0.000
## 4 | 9.264 | 6.581 0.002 0.505 | 5.652 0.006
## 5 | 4.108 | -3.633 0.001 0.782 | 0.381 0.000
## 6 | 4.532 | 3.037 0.000 0.449 | -0.834 0.000
## 7 | 2.870 | 0.453 0.000 0.025 | 1.400 0.000
## 8 | 3.268 | 0.228 0.000 0.005 | -0.258 0.000
## 9 | 3.568 | -2.653 0.000 0.553 | 0.627 0.000
## 10 | 4.011 | -0.025 0.000 0.000 | -2.277 0.001
## cos2 Dim.3 ctr cos2
## 1 0.016 | -0.657 0.000 0.040 |
## 2 0.000 | -3.789 0.004 0.373 |
## 3 0.002 | -2.597 0.002 0.205 |
## 4 0.372 | 0.157 0.000 0.000 |
## 5 0.009 | -0.377 0.000 0.008 |
## 6 0.034 | -1.593 0.001 0.123 |
## 7 0.238 | -1.293 0.000 0.203 |
## 8 0.006 | -2.453 0.002 0.563 |
## 9 0.031 | -1.288 0.000 0.130 |
## 10 0.322 | 0.623 0.000 0.024 |
##
## Variables (the 10 first)
## Dim.1 ctr cos2 Dim.2 ctr cos2
## nb_menages | 0.974 6.952 0.950 | -0.100 0.274 0.010 |
## nb_personnes_menage | 0.965 6.820 0.932 | 0.131 0.475 0.017 |
## nb_logements | 0.841 5.179 0.707 | -0.123 0.414 0.015 |
## nb_RP_1_piece | 0.459 1.546 0.211 | -0.613 10.326 0.376 |
## nb_RP_2_pieces | 0.684 3.424 0.468 | -0.550 8.322 0.303 |
## nb_RP_3_pieces | 0.792 4.591 0.627 | -0.285 2.229 0.081 |
## nb_RP_4_pieces | 0.707 3.663 0.500 | 0.374 3.844 0.140 |
## nb_RP_5_piece_et_plus | 0.434 1.379 0.188 | 0.733 14.760 0.537 |
## nb_RP_en_loc | 0.742 4.032 0.551 | -0.616 10.430 0.379 |
## nb_RP_proprio | 0.670 3.281 0.448 | 0.625 10.732 0.390 |
## Dim.3 ctr cos2
## nb_menages 0.079 0.272 0.006 |
## nb_personnes_menage -0.132 0.771 0.018 |
## nb_logements 0.205 1.851 0.042 |
## nb_RP_1_piece 0.403 7.163 0.163 |
## nb_RP_2_pieces 0.265 3.096 0.070 |
## nb_RP_3_pieces -0.168 1.248 0.028 |
## nb_RP_4_pieces -0.358 5.625 0.128 |
## nb_RP_5_piece_et_plus 0.064 0.179 0.004 |
## nb_RP_en_loc -0.147 0.955 0.022 |
## nb_RP_proprio 0.300 3.963 0.090 |
# explor(res)
fviz_pca_biplot(res, invisible = "ind")
fviz_pca_var(
res,
repel = TRUE,
col.var = "contrib"
)
fviz_pca_var(
res,
col.var = "contrib",
select.var = list(contrib = 5),
repel = TRUE
)
fviz_pca_var(
res,
col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE
)
fviz_pca_var(
res,
col.var = "cos2",
select.var = list(cos2 = 0.90),
repel = TRUE,
gradient.cols = c("#E7B800", "#FC4E07"),
)
# ACP sur l’ensemble des données au format large
col_num = c("nb_menages", "nb_personnes_menage",
"nb_logements", "nb_RP_1_piece",
"nb_RP_2_pieces", "nb_RP_3_pieces",
"nb_RP_4_pieces", "nb_RP_5_piece_et_plus",
"nb_RP_en_loc", "nb_RP_proprio",
"nb_personnes_en_RP", "nb_personnes_en_RP_location",
"nb_personnes_en_RP_proprio", "nb_residences_second_ou_occ",
"nb_logements_vacants", "nb_RP",
"nb_actifs",
"nb_actifs_occ", "nb_chomeurs" ,
"nb_agriculteurs", "nb_commercants",
"nb_cadres", "nb_professions_inter")
df_large = dcast(df, formula = IRIS + COM ~ annee,
value.var = col_num)
df_num <- df_large[, .SD, .SDcols = is.numeric]
res <- PCA(df_num,
scale.unit = TRUE,
graph = FALSE)
## Warning in PCA(df_num, scale.unit = TRUE, graph = FALSE): Missing values are
## imputed by the mean of the variable: you should use the imputePCA function of
## the missMDA package
summary(res)
##
## Call:
## PCA(X = df_num, scale.unit = TRUE, graph = FALSE)
##
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
## Variance 131.107 37.336 18.712 14.663 9.069 7.143 5.431
## % of var. 51.821 14.757 7.396 5.796 3.585 2.823 2.147
## Cumulative % of var. 51.821 66.578 73.974 79.770 83.355 86.178 88.325
## Dim.8 Dim.9 Dim.10 Dim.11 Dim.12 Dim.13 Dim.14
## Variance 3.737 3.272 2.889 2.665 2.163 1.774 1.428
## % of var. 1.477 1.293 1.142 1.053 0.855 0.701 0.564
## Cumulative % of var. 89.802 91.095 92.237 93.290 94.145 94.846 95.411
## Dim.15 Dim.16 Dim.17 Dim.18 Dim.19 Dim.20 Dim.21
## Variance 1.088 0.855 0.757 0.701 0.615 0.477 0.468
## % of var. 0.430 0.338 0.299 0.277 0.243 0.188 0.185
## Cumulative % of var. 95.841 96.179 96.478 96.755 96.998 97.187 97.372
## Dim.22 Dim.23 Dim.24 Dim.25 Dim.26 Dim.27 Dim.28
## Variance 0.434 0.394 0.373 0.310 0.292 0.288 0.228
## % of var. 0.172 0.156 0.148 0.123 0.116 0.114 0.090
## Cumulative % of var. 97.543 97.699 97.846 97.969 98.085 98.199 98.289
## Dim.29 Dim.30 Dim.31 Dim.32 Dim.33 Dim.34 Dim.35
## Variance 0.209 0.204 0.190 0.170 0.165 0.160 0.148
## % of var. 0.083 0.081 0.075 0.067 0.065 0.063 0.059
## Cumulative % of var. 98.371 98.452 98.527 98.595 98.660 98.723 98.782
## Dim.36 Dim.37 Dim.38 Dim.39 Dim.40 Dim.41 Dim.42
## Variance 0.146 0.142 0.131 0.124 0.115 0.110 0.105
## % of var. 0.058 0.056 0.052 0.049 0.045 0.043 0.042
## Cumulative % of var. 98.839 98.896 98.948 98.996 99.042 99.085 99.127
## Dim.43 Dim.44 Dim.45 Dim.46 Dim.47 Dim.48 Dim.49
## Variance 0.090 0.086 0.081 0.077 0.073 0.070 0.066
## % of var. 0.036 0.034 0.032 0.031 0.029 0.028 0.026
## Cumulative % of var. 99.162 99.196 99.228 99.259 99.288 99.315 99.341
## Dim.50 Dim.51 Dim.52 Dim.53 Dim.54 Dim.55 Dim.56
## Variance 0.063 0.060 0.059 0.057 0.053 0.049 0.048
## % of var. 0.025 0.024 0.023 0.023 0.021 0.020 0.019
## Cumulative % of var. 99.366 99.390 99.413 99.435 99.457 99.476 99.495
## Dim.57 Dim.58 Dim.59 Dim.60 Dim.61 Dim.62 Dim.63
## Variance 0.046 0.044 0.042 0.041 0.038 0.037 0.036
## % of var. 0.018 0.017 0.017 0.016 0.015 0.014 0.014
## Cumulative % of var. 99.513 99.531 99.547 99.564 99.578 99.593 99.607
## Dim.64 Dim.65 Dim.66 Dim.67 Dim.68 Dim.69 Dim.70
## Variance 0.033 0.032 0.031 0.029 0.029 0.028 0.026
## % of var. 0.013 0.013 0.012 0.012 0.011 0.011 0.010
## Cumulative % of var. 99.620 99.633 99.645 99.657 99.668 99.679 99.689
## Dim.71 Dim.72 Dim.73 Dim.74 Dim.75 Dim.76 Dim.77
## Variance 0.025 0.024 0.024 0.023 0.023 0.021 0.021
## % of var. 0.010 0.010 0.009 0.009 0.009 0.008 0.008
## Cumulative % of var. 99.699 99.709 99.718 99.727 99.736 99.745 99.753
## Dim.78 Dim.79 Dim.80 Dim.81 Dim.82 Dim.83 Dim.84
## Variance 0.020 0.019 0.019 0.018 0.017 0.016 0.016
## % of var. 0.008 0.008 0.007 0.007 0.007 0.006 0.006
## Cumulative % of var. 99.761 99.769 99.776 99.783 99.790 99.796 99.802
## Dim.85 Dim.86 Dim.87 Dim.88 Dim.89 Dim.90 Dim.91
## Variance 0.015 0.015 0.015 0.014 0.014 0.013 0.013
## % of var. 0.006 0.006 0.006 0.006 0.005 0.005 0.005
## Cumulative % of var. 99.809 99.814 99.820 99.826 99.831 99.837 99.842
## Dim.92 Dim.93 Dim.94 Dim.95 Dim.96 Dim.97 Dim.98
## Variance 0.013 0.012 0.012 0.012 0.011 0.011 0.011
## % of var. 0.005 0.005 0.005 0.005 0.005 0.004 0.004
## Cumulative % of var. 99.847 99.852 99.857 99.861 99.866 99.870 99.875
## Dim.99 Dim.100 Dim.101 Dim.102 Dim.103 Dim.104 Dim.105
## Variance 0.011 0.011 0.011 0.010 0.010 0.010 0.010
## % of var. 0.004 0.004 0.004 0.004 0.004 0.004 0.004
## Cumulative % of var. 99.879 99.883 99.887 99.891 99.895 99.899 99.903
## Dim.106 Dim.107 Dim.108 Dim.109 Dim.110 Dim.111 Dim.112
## Variance 0.009 0.009 0.009 0.008 0.008 0.008 0.008
## % of var. 0.004 0.004 0.003 0.003 0.003 0.003 0.003
## Cumulative % of var. 99.907 99.910 99.913 99.917 99.920 99.923 99.926
## Dim.113 Dim.114 Dim.115 Dim.116 Dim.117 Dim.118 Dim.119
## Variance 0.008 0.007 0.007 0.007 0.007 0.006 0.006
## % of var. 0.003 0.003 0.003 0.003 0.003 0.003 0.003
## Cumulative % of var. 99.929 99.932 99.935 99.938 99.940 99.943 99.945
## Dim.120 Dim.121 Dim.122 Dim.123 Dim.124 Dim.125 Dim.126
## Variance 0.006 0.006 0.006 0.006 0.005 0.005 0.005
## % of var. 0.002 0.002 0.002 0.002 0.002 0.002 0.002
## Cumulative % of var. 99.948 99.950 99.952 99.955 99.957 99.959 99.961
## Dim.127 Dim.128 Dim.129 Dim.130 Dim.131 Dim.132 Dim.133
## Variance 0.005 0.005 0.005 0.004 0.004 0.004 0.004
## % of var. 0.002 0.002 0.002 0.002 0.002 0.002 0.002
## Cumulative % of var. 99.963 99.965 99.967 99.968 99.970 99.972 99.973
## Dim.134 Dim.135 Dim.136 Dim.137 Dim.138 Dim.139 Dim.140
## Variance 0.004 0.003 0.003 0.003 0.003 0.003 0.003
## % of var. 0.001 0.001 0.001 0.001 0.001 0.001 0.001
## Cumulative % of var. 99.975 99.976 99.977 99.978 99.979 99.980 99.981
## Dim.141 Dim.142 Dim.143 Dim.144 Dim.145 Dim.146 Dim.147
## Variance 0.003 0.002 0.002 0.002 0.002 0.002 0.002
## % of var. 0.001 0.001 0.001 0.001 0.001 0.001 0.001
## Cumulative % of var. 99.982 99.983 99.984 99.985 99.986 99.987 99.988
## Dim.148 Dim.149 Dim.150 Dim.151 Dim.152 Dim.153 Dim.154
## Variance 0.002 0.002 0.002 0.002 0.002 0.001 0.001
## % of var. 0.001 0.001 0.001 0.001 0.001 0.001 0.001
## Cumulative % of var. 99.989 99.989 99.990 99.991 99.991 99.992 99.992
## Dim.155 Dim.156 Dim.157 Dim.158 Dim.159 Dim.160 Dim.161
## Variance 0.001 0.001 0.001 0.001 0.001 0.001 0.001
## % of var. 0.001 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 99.993 99.993 99.994 99.994 99.994 99.995 99.995
## Dim.162 Dim.163 Dim.164 Dim.165 Dim.166 Dim.167 Dim.168
## Variance 0.001 0.001 0.001 0.001 0.001 0.001 0.001
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 99.996 99.996 99.996 99.996 99.997 99.997 99.997
## Dim.169 Dim.170 Dim.171 Dim.172 Dim.173 Dim.174 Dim.175
## Variance 0.001 0.001 0.001 0.001 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 99.998 99.998 99.998 99.998 99.998 99.999 99.999
## Dim.176 Dim.177 Dim.178 Dim.179 Dim.180 Dim.181 Dim.182
## Variance 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 99.999 99.999 99.999 99.999 99.999 99.999 99.999
## Dim.183 Dim.184 Dim.185 Dim.186 Dim.187 Dim.188 Dim.189
## Variance 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 99.999 100.000 100.000 100.000 100.000 100.000 100.000
## Dim.190 Dim.191 Dim.192 Dim.193 Dim.194 Dim.195 Dim.196
## Variance 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
## Dim.197 Dim.198 Dim.199 Dim.200 Dim.201 Dim.202 Dim.203
## Variance 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
## Dim.204 Dim.205 Dim.206 Dim.207 Dim.208 Dim.209 Dim.210
## Variance 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
## Dim.211 Dim.212 Dim.213 Dim.214 Dim.215 Dim.216 Dim.217
## Variance 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
## Dim.218 Dim.219 Dim.220 Dim.221 Dim.222 Dim.223 Dim.224
## Variance 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
## Dim.225 Dim.226 Dim.227 Dim.228 Dim.229 Dim.230 Dim.231
## Variance 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
## Dim.232 Dim.233 Dim.234 Dim.235 Dim.236 Dim.237 Dim.238
## Variance 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
## Dim.239 Dim.240 Dim.241 Dim.242 Dim.243 Dim.244 Dim.245
## Variance 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
## Dim.246 Dim.247 Dim.248 Dim.249 Dim.250 Dim.251 Dim.252
## Variance 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
## Dim.253
## Variance 0.000
## % of var. 0.000
## Cumulative % of var. 100.000
##
## Individuals (the 10 first)
## Dist Dim.1 ctr cos2 Dim.2
## 1 | 9.288 | -8.439 0.004 0.825 | -0.608
## 2 | 16.685 | 14.077 0.011 0.712 | -3.154
## 3 | 19.644 | 17.556 0.017 0.799 | 0.139
## 4 | 27.976 | 22.158 0.027 0.627 | 15.423
## 5 | 7.977 | -5.048 0.001 0.400 | 0.401
## 6 | 14.372 | 9.219 0.005 0.411 | -6.236
## 7 | 4.736 | -0.030 0.000 0.000 | 1.429
## 8 | 5.307 | 0.666 0.000 0.016 | 3.657
## 9 | 6.757 | 0.528 0.000 0.006 | -2.172
## 10 | 9.742 | -9.051 0.005 0.863 | 1.474
## ctr cos2 Dim.3 ctr cos2
## 1 0.000 0.004 | -0.016 0.000 0.000 |
## 2 0.002 0.036 | -6.644 0.017 0.159 |
## 3 0.000 0.000 | -6.632 0.017 0.114 |
## 4 0.046 0.304 | -1.480 0.001 0.003 |
## 5 0.000 0.003 | -0.104 0.000 0.000 |
## 6 0.008 0.188 | -2.880 0.003 0.040 |
## 7 0.000 0.091 | -0.608 0.000 0.017 |
## 8 0.003 0.475 | -1.579 0.001 0.089 |
## 9 0.001 0.103 | -5.391 0.011 0.637 |
## 10 0.000 0.023 | -1.389 0.001 0.020 |
##
## Variables (the 10 first)
## Dim.1 ctr cos2 Dim.2 ctr cos2
## nb_menages_2012 | 0.916 0.640 0.839 | -0.129 0.045 0.017
## nb_menages_2013 | 0.951 0.690 0.905 | -0.111 0.033 0.012
## nb_menages_2014 | 0.938 0.671 0.880 | -0.091 0.022 0.008
## nb_menages_2015 | 0.968 0.715 0.937 | -0.098 0.026 0.010
## nb_menages_2016 | 0.973 0.722 0.947 | -0.093 0.023 0.009
## nb_menages_2017 | 0.976 0.726 0.952 | -0.087 0.020 0.008
## nb_menages_2018 | 0.977 0.727 0.954 | -0.082 0.018 0.007
## nb_menages_2019 | 0.976 0.727 0.953 | -0.077 0.016 0.006
## nb_menages_2020 | 0.974 0.724 0.949 | -0.068 0.012 0.005
## nb_menages_2021 | 0.969 0.717 0.940 | -0.064 0.011 0.004
## Dim.3 ctr cos2
## nb_menages_2012 | 0.068 0.025 0.005 |
## nb_menages_2013 | 0.046 0.011 0.002 |
## nb_menages_2014 | 0.034 0.006 0.001 |
## nb_menages_2015 | 0.030 0.005 0.001 |
## nb_menages_2016 | 0.027 0.004 0.001 |
## nb_menages_2017 | 0.024 0.003 0.001 |
## nb_menages_2018 | 0.021 0.002 0.000 |
## nb_menages_2019 | 0.019 0.002 0.000 |
## nb_menages_2020 | 0.016 0.001 0.000 |
## nb_menages_2021 | 0.015 0.001 0.000 |
# explor(res)
fviz_pca_biplot(res, invisible = "ind")
fviz_pca_var(
res,
repel = TRUE,
col.var = "contrib"
)
fviz_pca_var(
res,
col.var = "contrib",
select.var = list(contrib = 5),
repel = TRUE
)
fviz_pca_var(
res,
col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE
)
fviz_pca_var(
res,
col.var = "cos2",
select.var = list(cos2 = 0.95),
repel = TRUE,
gradient.cols = c("#E7B800", "#FC4E07"),
)
df_large = dcast(df[annee %in% c(2015, 2016, 2017)],
formula = IRIS + COM ~ annee,
value.var = col_num)
df_num <- df_large[, .SD, .SDcols = is.numeric]
res <- PCA(df_num,
scale.unit = TRUE,
graph = FALSE)
## Warning in PCA(df_num, scale.unit = TRUE, graph = FALSE): Missing values are
## imputed by the mean of the variable: you should use the imputePCA function of
## the missMDA package
summary(res)
##
## Call:
## PCA(X = df_num, scale.unit = TRUE, graph = FALSE)
##
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
## Variance 36.762 10.563 5.311 4.098 2.755 2.168 1.591
## % of var. 53.279 15.308 7.696 5.940 3.992 3.141 2.305
## Cumulative % of var. 53.279 68.587 76.283 82.223 86.215 89.356 91.662
## Dim.8 Dim.9 Dim.10 Dim.11 Dim.12 Dim.13 Dim.14
## Variance 1.198 0.903 0.832 0.686 0.557 0.358 0.232
## % of var. 1.737 1.308 1.206 0.995 0.807 0.518 0.337
## Cumulative % of var. 93.399 94.707 95.913 96.907 97.715 98.233 98.570
## Dim.15 Dim.16 Dim.17 Dim.18 Dim.19 Dim.20 Dim.21
## Variance 0.221 0.098 0.085 0.059 0.052 0.049 0.046
## % of var. 0.321 0.142 0.124 0.086 0.075 0.070 0.067
## Cumulative % of var. 98.891 99.033 99.156 99.242 99.317 99.387 99.454
## Dim.22 Dim.23 Dim.24 Dim.25 Dim.26 Dim.27 Dim.28
## Variance 0.040 0.037 0.034 0.026 0.025 0.022 0.021
## % of var. 0.058 0.053 0.049 0.038 0.036 0.032 0.030
## Cumulative % of var. 99.512 99.565 99.614 99.652 99.688 99.720 99.750
## Dim.29 Dim.30 Dim.31 Dim.32 Dim.33 Dim.34 Dim.35
## Variance 0.018 0.018 0.017 0.015 0.015 0.013 0.012
## % of var. 0.026 0.025 0.024 0.022 0.021 0.018 0.018
## Cumulative % of var. 99.776 99.801 99.826 99.848 99.870 99.888 99.906
## Dim.36 Dim.37 Dim.38 Dim.39 Dim.40 Dim.41 Dim.42
## Variance 0.011 0.010 0.008 0.007 0.006 0.005 0.004
## % of var. 0.016 0.015 0.011 0.010 0.008 0.007 0.007
## Cumulative % of var. 99.921 99.936 99.947 99.957 99.966 99.973 99.979
## Dim.43 Dim.44 Dim.45 Dim.46 Dim.47 Dim.48 Dim.49
## Variance 0.004 0.003 0.002 0.001 0.001 0.001 0.001
## % of var. 0.005 0.005 0.002 0.002 0.002 0.001 0.001
## Cumulative % of var. 99.985 99.990 99.992 99.994 99.996 99.997 99.998
## Dim.50 Dim.51 Dim.52 Dim.53 Dim.54 Dim.55 Dim.56
## Variance 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.001 0.001 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 99.999 100.000 100.000 100.000 100.000 100.000 100.000
## Dim.57 Dim.58 Dim.59 Dim.60 Dim.61 Dim.62 Dim.63
## Variance 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
## Dim.64 Dim.65 Dim.66 Dim.67 Dim.68 Dim.69
## Variance 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000
##
## Individuals (the 10 first)
## Dist Dim.1 ctr cos2 Dim.2
## 1 | 4.824 | -4.410 0.004 0.836 | -0.834
## 2 | 8.224 | 6.920 0.010 0.708 | -2.192
## 3 | 10.343 | 9.321 0.017 0.812 | 0.346
## 4 | 14.271 | 11.117 0.025 0.607 | 8.272
## 5 | 4.773 | -3.707 0.003 0.603 | 0.054
## 6 | 7.284 | 4.815 0.005 0.437 | -3.433
## 7 | 1.710 | 0.110 0.000 0.004 | 0.540
## 8 | 3.058 | 0.651 0.000 0.045 | 2.192
## 9 | 3.677 | 0.240 0.000 0.004 | -1.335
## 10 | 4.910 | -4.403 0.004 0.804 | 1.319
## ctr cos2 Dim.3 ctr cos2
## 1 0.000 0.030 | -0.121 0.000 0.001 |
## 2 0.003 0.071 | -3.026 0.013 0.135 |
## 3 0.000 0.001 | -3.549 0.017 0.118 |
## 4 0.047 0.336 | -0.312 0.000 0.000 |
## 5 0.000 0.000 | -0.246 0.000 0.003 |
## 6 0.008 0.222 | -1.627 0.004 0.050 |
## 7 0.000 0.100 | -0.291 0.000 0.029 |
## 8 0.003 0.514 | -0.667 0.001 0.048 |
## 9 0.001 0.132 | -2.913 0.012 0.628 |
## 10 0.001 0.072 | -0.777 0.001 0.025 |
##
## Variables (the 10 first)
## Dim.1 ctr cos2 Dim.2 ctr cos2
## nb_menages_2015 | 0.974 2.580 0.948 | -0.098 0.092 0.010
## nb_menages_2016 | 0.978 2.603 0.957 | -0.093 0.081 0.009
## nb_menages_2017 | 0.976 2.589 0.952 | -0.088 0.074 0.008
## nb_personnes_menage_2015 | 0.952 2.468 0.907 | 0.119 0.134 0.014
## nb_personnes_menage_2016 | 0.954 2.474 0.910 | 0.121 0.139 0.015
## nb_personnes_menage_2017 | 0.949 2.451 0.901 | 0.119 0.135 0.014
## nb_logements_2015 | 0.843 1.934 0.711 | -0.111 0.117 0.012
## nb_logements_2016 | 0.849 1.963 0.722 | -0.109 0.113 0.012
## nb_logements_2017 | 0.850 1.964 0.722 | -0.109 0.112 0.012
## nb_RP_1_piece_2015 | 0.470 0.601 0.221 | -0.542 2.778 0.293
## Dim.3 ctr cos2
## nb_menages_2015 | 0.032 0.019 0.001 |
## nb_menages_2016 | 0.029 0.016 0.001 |
## nb_menages_2017 | 0.027 0.014 0.001 |
## nb_personnes_menage_2015 | -0.198 0.738 0.039 |
## nb_personnes_menage_2016 | -0.199 0.747 0.040 |
## nb_personnes_menage_2017 | -0.197 0.733 0.039 |
## nb_logements_2015 | 0.181 0.614 0.033 |
## nb_logements_2016 | 0.179 0.605 0.032 |
## nb_logements_2017 | 0.177 0.592 0.031 |
## nb_RP_1_piece_2015 | 0.443 3.704 0.197 |
fviz_pca_biplot(res, invisible = "ind")
fviz_pca_var(
res,
repel = TRUE,
col.var = "contrib"
)
fviz_pca_var(
res,
col.var = "contrib",
select.var = list(contrib = 5),
repel = TRUE
)
fviz_pca_var(
res,
col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE
)
fviz_pca_var(
res,
col.var = "cos2",
select.var = list(cos2 = 0.95),
repel = TRUE,
gradient.cols = c("#E7B800", "#FC4E07"),
)
df_large[, diff_pop_2015_2017 := nb_personnes_menage_2015 - nb_personnes_menage_2017]
df_large[, diff_log_2015_2017 := nb_logements_2015 - nb_logements_2017]
df_large[, diff_1_piece_2015_2017 := nb_RP_1_piece_2015 - nb_RP_1_piece_2017]
df_large[, diff_loc_2015_2017 := nb_RP_en_loc_2015 - nb_RP_en_loc_2017]
df_large[, diff_vacants_2015_2017 := nb_logements_vacants_2015 - nb_logements_vacants_2017]
df_large[, diff_actifs_occ_2015_2017 := nb_actifs_occ_2015 - nb_actifs_occ_2017]
df_num <- df_large[, .SD, .SDcols = is.numeric]
res <- PCA(df_num,
scale.unit = TRUE,
graph = FALSE)
## Warning in PCA(df_num, scale.unit = TRUE, graph = FALSE): Missing values are
## imputed by the mean of the variable: you should use the imputePCA function of
## the missMDA package
summary(res)
##
## Call:
## PCA(X = df_num, scale.unit = TRUE, graph = FALSE)
##
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
## Variance 36.894 10.565 5.312 4.140 3.166 2.712 2.065
## % of var. 49.192 14.087 7.083 5.521 4.221 3.616 2.753
## Cumulative % of var. 49.192 63.279 70.362 75.883 80.104 83.720 86.473
## Dim.8 Dim.9 Dim.10 Dim.11 Dim.12 Dim.13 Dim.14
## Variance 1.597 1.204 1.190 1.062 0.900 0.829 0.682
## % of var. 2.129 1.605 1.586 1.416 1.200 1.105 0.909
## Cumulative % of var. 88.603 90.208 91.794 93.211 94.411 95.516 96.425
## Dim.15 Dim.16 Dim.17 Dim.18 Dim.19 Dim.20 Dim.21
## Variance 0.550 0.426 0.356 0.247 0.228 0.166 0.129
## % of var. 0.733 0.568 0.475 0.330 0.304 0.222 0.172
## Cumulative % of var. 97.158 97.726 98.201 98.530 98.834 99.056 99.228
## Dim.22 Dim.23 Dim.24 Dim.25 Dim.26 Dim.27 Dim.28
## Variance 0.097 0.065 0.054 0.047 0.031 0.028 0.025
## % of var. 0.129 0.086 0.072 0.063 0.041 0.037 0.034
## Cumulative % of var. 99.357 99.444 99.516 99.579 99.620 99.657 99.691
## Dim.29 Dim.30 Dim.31 Dim.32 Dim.33 Dim.34 Dim.35
## Variance 0.024 0.022 0.019 0.018 0.017 0.016 0.015
## % of var. 0.032 0.030 0.025 0.024 0.023 0.021 0.020
## Cumulative % of var. 99.723 99.753 99.778 99.802 99.825 99.846 99.866
## Dim.36 Dim.37 Dim.38 Dim.39 Dim.40 Dim.41 Dim.42
## Variance 0.015 0.012 0.011 0.010 0.008 0.008 0.007
## % of var. 0.020 0.015 0.014 0.014 0.011 0.010 0.009
## Cumulative % of var. 99.886 99.901 99.916 99.929 99.940 99.950 99.960
## Dim.43 Dim.44 Dim.45 Dim.46 Dim.47 Dim.48 Dim.49
## Variance 0.006 0.005 0.005 0.003 0.003 0.001 0.001
## % of var. 0.008 0.007 0.006 0.005 0.004 0.002 0.002
## Cumulative % of var. 99.968 99.975 99.981 99.986 99.990 99.992 99.993
## Dim.50 Dim.51 Dim.52 Dim.53 Dim.54 Dim.55 Dim.56
## Variance 0.001 0.001 0.001 0.001 0.001 0.000 0.000
## % of var. 0.001 0.001 0.001 0.001 0.001 0.001 0.000
## Cumulative % of var. 99.995 99.996 99.997 99.998 99.999 99.999 100.000
## Dim.57 Dim.58 Dim.59 Dim.60 Dim.61 Dim.62 Dim.63
## Variance 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
## Dim.64 Dim.65 Dim.66 Dim.67 Dim.68 Dim.69 Dim.70
## Variance 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
## Dim.71 Dim.72 Dim.73 Dim.74 Dim.75
## Variance 0.000 0.000 0.000 0.000 0.000
## % of var. 0.000 0.000 0.000 0.000 0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000
##
## Individuals (the 10 first)
## Dist Dim.1 ctr cos2 Dim.2
## 1 | 5.572 | -4.312 0.004 0.599 | -0.801
## 2 | 8.662 | 6.931 0.010 0.640 | -2.227
## 3 | 10.482 | 9.385 0.018 0.802 | 0.345
## 4 | 14.461 | 11.163 0.025 0.596 | 8.268
## 5 | 4.773 | -3.698 0.003 0.600 | 0.060
## 6 | 7.339 | 4.839 0.005 0.435 | -3.428
## 7 | 1.710 | 0.109 0.000 0.004 | 0.539
## 8 | 3.878 | 0.601 0.000 0.024 | 2.160
## 9 | 3.972 | 0.288 0.000 0.005 | -1.327
## 10 | 5.082 | -4.429 0.004 0.760 | 1.306
## ctr cos2 Dim.3 ctr cos2
## 1 0.000 0.021 | -0.100 0.000 0.000 |
## 2 0.003 0.066 | -3.044 0.013 0.123 |
## 3 0.000 0.001 | -3.558 0.017 0.115 |
## 4 0.047 0.327 | -0.329 0.000 0.001 |
## 5 0.000 0.000 | -0.244 0.000 0.003 |
## 6 0.008 0.218 | -1.633 0.004 0.050 |
## 7 0.000 0.099 | -0.292 0.000 0.029 |
## 8 0.003 0.310 | -0.693 0.001 0.032 |
## 9 0.001 0.112 | -2.906 0.012 0.536 |
## 10 0.001 0.066 | -0.780 0.001 0.024 |
##
## Variables (the 10 first)
## Dim.1 ctr cos2 Dim.2 ctr cos2
## nb_menages_2015 | 0.972 2.563 0.946 | -0.100 0.094 0.010
## nb_menages_2016 | 0.978 2.591 0.956 | -0.093 0.083 0.009
## nb_menages_2017 | 0.976 2.583 0.953 | -0.089 0.074 0.008
## nb_personnes_menage_2015 | 0.951 2.453 0.905 | 0.118 0.131 0.014
## nb_personnes_menage_2016 | 0.954 2.466 0.910 | 0.121 0.137 0.015
## nb_personnes_menage_2017 | 0.951 2.449 0.904 | 0.119 0.135 0.014
## nb_logements_2015 | 0.842 1.921 0.709 | -0.112 0.119 0.013
## nb_logements_2016 | 0.849 1.953 0.721 | -0.110 0.115 0.012
## nb_logements_2017 | 0.850 1.958 0.722 | -0.109 0.112 0.012
## nb_RP_1_piece_2015 | 0.469 0.596 0.220 | -0.542 2.780 0.294
## Dim.3 ctr cos2
## nb_menages_2015 | 0.031 0.018 0.001 |
## nb_menages_2016 | 0.029 0.016 0.001 |
## nb_menages_2017 | 0.027 0.014 0.001 |
## nb_personnes_menage_2015 | -0.198 0.741 0.039 |
## nb_personnes_menage_2016 | -0.199 0.747 0.040 |
## nb_personnes_menage_2017 | -0.197 0.730 0.039 |
## nb_logements_2015 | 0.180 0.611 0.032 |
## nb_logements_2016 | 0.179 0.603 0.032 |
## nb_logements_2017 | 0.177 0.592 0.031 |
## nb_RP_1_piece_2015 | 0.443 3.699 0.196 |
fviz_pca_biplot(res, invisible = "ind")
fviz_pca_var(
res,
repel = TRUE,
col.var = "contrib"
)
fviz_pca_var(
res,
col.var = "contrib",
select.var = list(contrib = 5),
repel = TRUE
)
fviz_pca_var(
res,
col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE
)
fviz_pca_var(
res,
col.var = "cos2",
select.var = list(cos2 = 0.95),
repel = TRUE,
gradient.cols = c("#E7B800", "#FC4E07"),
)
df_num_com <- df[annee==2017]
df_num <- df_num_com[, .SD, .SDcols = is.numeric]
df_num = df_num[,-"annee"]
res <- PCA(df_num,
scale.unit = TRUE,
graph = FALSE)
set.seed(123)
# on récupère les coordonnées
ind_coords <- res$ind$coord
ind_coords_sub <- ind_coords[, 1:2] # sélection des axes 1 et 2
km <- kmeans(ind_coords_sub, centers = 20, nstart = 25)
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
df_cluster <- df_num_com
df_cluster$cluster <- km$cluster
fviz_cluster(km, data = ind_coords_sub,
ellipse.type = "convex",
geom = "point",
palette = "jco",
repel = TRUE)
## Warning: This manual palette can handle a maximum of 10 values. You have supplied 20
## This manual palette can handle a maximum of 10 values. You have supplied 20
## Warning: Removed 7806 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
set.seed(123)
# on récupère les coordonnées
ind_coords <- res$ind$coord
ind_coords_sub <- ind_coords[, 1:2] # sélection des axes 1 et 2
km <- kmeans(ind_coords_sub, centers = 100, nstart = 25)
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
df_cluster <- df_num_com
df_cluster$cluster <- km$cluster
fviz_cluster(km, data = ind_coords_sub,
ellipse.type = "convex",
geom = "point",
palette = "jco",
repel = TRUE)
## Warning: This manual palette can handle a maximum of 10 values. You have supplied 100
## This manual palette can handle a maximum of 10 values. You have supplied 100
## Warning: Removed 12164 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 90 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '26'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '26'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '27'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '27'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '28'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '28'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '29'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '29'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '30'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '30'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '31'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '31'
Même avec 100 clusters on a les 100 clusters à Lille ou dans le 19e arrondissement de Paris.
data_com = df[, lapply(.SD, sum), by = c("COM", "annee"), .SDcols = is.numeric]
df_com_2017 <- data_com[annee==2017]
df_num <- df_com_2017[, .SD, .SDcols = is.numeric]
df_num = df_num[,-"annee"]
res <- PCA(df_num,
scale.unit = TRUE,
graph = FALSE)
fviz_pca_var(
res,
repel = TRUE,
col.var = "contrib"
)
fviz_pca_var(
res,
col.var = "contrib",
select.var = list(contrib = 5),
repel = TRUE
)
set.seed(123)
# on récupère les coordonnées
ind_coords <- res$ind$coord
ind_coords_sub <- ind_coords[, 1:2] # sélection des axes 1 et 2
km <- kmeans(ind_coords_sub, centers = 10, nstart = 25)
df_cluster <- df_com_2017
df_cluster$cluster <- km$cluster
fviz_cluster(km, data = ind_coords_sub,
ellipse.type = "convex",
geom = "point",
palette = "jco",
repel = TRUE)
df_cluster[, Paris := fifelse(substr(COM, 1, 3) == "751", 1, 0)]
print(df_cluster[Paris == 1, c("COM", "cluster")])
## COM cluster
## <char> <int>
## 1: 75101 7
## 2: 75102 7
## 3: 75103 1
## 4: 75104 1
## 5: 75105 6
## 6: 75106 1
## 7: 75107 6
## 8: 75108 1
## 9: 75109 6
## 10: 75110 8
## 11: 75111 2
## 12: 75112 10
## 13: 75113 2
## 14: 75114 10
## 15: 75115 2
## 16: 75116 2
## 17: 75117 2
## 18: 75118 2
## 19: 75119 2
## 20: 75120 2
## COM cluster
print(length(unique(df_cluster[Paris == 1, cluster])))
## [1] 6
km <- kmeans(ind_coords_sub, centers = 30, nstart = 25)
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
df_cluster <- df_com_2017
df_cluster$cluster <- km$cluster
fviz_cluster(km, data = ind_coords_sub,
ellipse.type = "convex",
geom = "point",
palette = "jco",
repel = TRUE)
## Warning: This manual palette can handle a maximum of 10 values. You have
## supplied 30
## Warning: This manual palette can handle a maximum of 10 values. You have
## supplied 30
## Warning: Removed 711 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '26'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '26'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '27'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '27'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '28'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '28'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '29'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '29'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '30'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '30'
df_cluster[, Paris := fifelse(substr(COM, 1, 3) == "751", 1, 0)]
print(df_cluster[Paris == 1, c("COM", "cluster")])
## COM cluster
## <char> <int>
## 1: 75101 4
## 2: 75102 8
## 3: 75103 10
## 4: 75104 8
## 5: 75105 6
## 6: 75106 10
## 7: 75107 6
## 8: 75108 10
## 9: 75109 6
## 10: 75110 20
## 11: 75111 12
## 12: 75112 2
## 13: 75113 12
## 14: 75114 2
## 15: 75115 15
## 16: 75116 12
## 17: 75117 12
## 18: 75118 12
## 19: 75119 12
## 20: 75120 12
## COM cluster
print(length(unique(df_cluster[Paris == 1, cluster])))
## [1] 8